In [1]:
import pandas as pd
from datetime import datetime
sf_file = "/home/sajit/coursera/ds_scale/systems_algos/assignments/datasci_course_materials/assignment6/sanfrancisco_incidents_summer_2014.csv"
sf = pd.read_csv(sf_file)
seattle_file = "/home/sajit/coursera/ds_scale/systems_algos/assignments/datasci_course_materials/assignment6/seattle_incidents_summer_2014.csv"
seattle = pd.read_csv(seattle_file)
seattle.columns = [c.replace(' ', '_') for c in seattle.columns]
seattle.columns = [c.replace('/', '_') for c in seattle.columns]
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py:2723: DtypeWarning: Columns (9) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
In [2]:
# Drop all columns that we won't be using 
seattle.drop(['RMS_CDW_ID','General_Offense_Number', 'Offense_Code',
       'Offense_Code_Extension', 'Offense_Type', 'Summary_Offense_Code','Date_Reported', 'Occurred_Date_Range_End',
       'Hundred_Block_Location', 'Zone_Beat',
       'Census_Tract_2000', 'Longitude', 'Latitude', 'Location','Year'],inplace=True,axis=1,errors='ignore')
seattle.columns.values
Out[2]:
array(['Summarized_Offense_Description',
       'Occurred_Date_or_Date_Range_Start', 'District_Sector', 'Month'], dtype=object)
In [3]:
day_of_week_dict = {
    0: 'Monday',
    1: 'Tuesday',
    2: 'Wednesday',
    3: 'Thursday',
    4: 'Friday',
    5: 'Saturday',
    6: 'Sunday'
}
month_dict = {
    6 : 'June',
    7: 'July',
    8: 'August'
}

def seattle_mapper(x):
    format =  '%m/%d/%Y %I:%M:%S %p'
    my_date = datetime.strptime(x,format)
    hour_slot = 0
    my_hour = my_date.hour
    if 6 <= my_hour < 12:
        hour_slot = 1
    elif 12 <= my_hour < 18:
        hour_slot = 2
    elif my_hour >= 18:
        hour_slot = 3
    return pd.Series({'time_of_day': hour_slot, 'day_of_week': day_of_week_dict[my_date.weekday()], 'date': my_date.date(),
                     'month_label': month_dict[my_date.month]})

newcols = seattle['Occurred_Date_or_Date_Range_Start'].apply(seattle_mapper)

seattle = seattle.join(newcols)

seattle.columns.values
Out[3]:
array(['Summarized_Offense_Description',
       'Occurred_Date_or_Date_Range_Start', 'District_Sector', 'Month',
       'date', 'day_of_week', 'month_label', 'time_of_day'], dtype=object)
In [4]:
def mapper_sf_time(x):
    hour = int(x.split(':')[0])
    if 0<= hour < 6:
        return 0;
    elif 6<= hour < 12:
        return 1;
    elif 12 <= hour < 18:
        return 2;
    else:
        return 3

def sf_map_date(x):
    format = '%m/%d/%Y'
    my_date = datetime.strptime(x,format)
    return month_dict[my_date.month]


sf['month_label'] = sf['Date'].apply(sf_map_date)
sf['time_of_day'] = sf['Time'].apply(mapper_sf_time)


# Drop all columns that we won't be using 
sf.drop(['Descript', 'Time',
       'Resolution', 'Address', 'X','Y', 'Location',
       'PdId'],inplace=True,axis=1,errors='ignore')
sf.columns.values
Out[4]:
array(['IncidntNum', 'Category', 'DayOfWeek', 'Date', 'PdDistrict',
       'month_label', 'time_of_day'], dtype=object)
In [5]:
from bokeh.charts import Bar, output_notebook, show
from bokeh.models import HoverTool
output_notebook()


seattle_time_slot_crimes = Bar(seattle, 'time_of_day', values='Summarized_Offense_Description', stack="Summarized_Offense_Description",title="Seattle:Incidents by time of the day",agg='count',
       xlabel="Time of the day",ylabel="Count", tools='hover')
seattle_time_slot_crimes.select(dict(type=HoverTool)).tooltips = {"Offense Type" : "@Summarized_Offense_Description"}
show(seattle_time_slot_crimes)
BokehJS successfully loaded
                inspect
                • Hover Tool
                  Out[5]:

                  <Bokeh Notebook handle for In[5]>

                  In [6]:
                  sf_time_slot_crimes = Bar(sf, 'time_of_day', values='Category', stack="Category",title="SF:Incidents by time of the day",agg='count',
                         xlabel="Time of the day",ylabel="Count",tools='hover')
                  sf_time_slot_crimes.select(dict(type=HoverTool)).tooltips = {"Offense Type" : "@Category"}
                  show(sf_time_slot_crimes)
                  
                                inspect
                                • Hover Tool
                                  Out[6]:

                                  <Bokeh Notebook handle for In[6]>

                                  In [7]:
                                  seattle_day_ofweek = Bar(seattle, 'day_of_week', values='Summarized_Offense_Description', stack="Summarized_Offense_Description",title="Seattle:Incidents by day of the week",agg='count',
                                         xlabel="Day of Week",ylabel="Count",tools='hover')
                                  seattle_day_ofweek.select(dict(type=HoverTool)).tooltips = {"Offense Type" : "@Summarized_Offense_Description"}
                                  
                                  show(seattle_day_ofweek)
                                  
                                                inspect
                                                • Hover Tool
                                                  Out[7]:

                                                  <Bokeh Notebook handle for In[7]>

                                                  In [8]:
                                                  sf_day_ofweek = Bar(sf, 'DayOfWeek', values='Category', title="SF:Incidents by day of the week",agg='count',
                                                         xlabel="Day of Week",ylabel="Count",stack="Category",tools='hover')
                                                  sf_day_ofweek.select(dict(type=HoverTool)).tooltips = {"Offense Type" : "@Category"}
                                                  show(sf_day_ofweek)
                                                  
                                                                inspect
                                                                • Hover Tool
                                                                  Out[8]:

                                                                  <Bokeh Notebook handle for In[8]>

                                                                  In [9]:
                                                                  seattle_month = Bar(seattle, 'month_label', values='Summarized_Offense_Description',stack="Summarized_Offense_Description", title="Seattle:Incidents by month",agg='count',
                                                                         xlabel="Month",ylabel="Count",tools='hover')
                                                                  seattle_month.select(dict(type=HoverTool)).tooltips = {"Offense Type" : "@Summarized_Offense_Description"}
                                                                  
                                                                  show(seattle_month)
                                                                  
                                                                                inspect
                                                                                • Hover Tool
                                                                                  Out[9]:

                                                                                  <Bokeh Notebook handle for In[9]>

                                                                                  In [10]:
                                                                                  sf_month = Bar(sf, label='month_label', values='Category',stack='Category', title="SF:Incidents by month",agg='count',
                                                                                         xlabel="Month",ylabel="Count",tools='hover')
                                                                                  sf_month.select(dict(type=HoverTool)).tooltips = {"Offense Type" : "@Category"}
                                                                                  show(sf_month)
                                                                                  
                                                                                                inspect
                                                                                                • Hover Tool
                                                                                                  Out[10]:

                                                                                                  <Bokeh Notebook handle for In[10]>

                                                                                                  In [11]:
                                                                                                  seattle_pd = Bar(seattle, 'District_Sector', values='Summarized_Offense_Description',stack='Summarized_Offense_Description', title="Seattle:Incidents by PD",agg='count',
                                                                                                         xlabel="PD",ylabel="Count",tools='hover')
                                                                                                  seattle_pd.select(dict(type=HoverTool)).tooltips = {"Offense Type" : "@Summarized_Offense_Description"}
                                                                                                  show(seattle_pd)
                                                                                                  
                                                                                                                inspect
                                                                                                                • Hover Tool
                                                                                                                  Out[11]:

                                                                                                                  <Bokeh Notebook handle for In[11]>

                                                                                                                  In [12]:
                                                                                                                  sf_pd = Bar(sf, 'PdDistrict', values='Category', title="Incidents by PD",agg='count',
                                                                                                                         xlabel="PD",ylabel="Count",stack="Category",tools="hover")
                                                                                                                  sf_pd.select(dict(type=HoverTool)).tooltips = {"Offense Type" : "@Category"}
                                                                                                                  show(sf_pd)
                                                                                                                  
                                                                                                                                inspect
                                                                                                                                • Hover Tool
                                                                                                                                  Out[12]:

                                                                                                                                  <Bokeh Notebook handle for In[12]>

                                                                                                                                  Lets see how crimes vary by the time of the day and day of the week

                                                                                                                                  Correlated incidentsΒΆ

                                                                                                                                  In [35]:
                                                                                                                                  result = seattle.groupby(['Summarized_Offense_Description','day_of_week','time_of_day'],squeeze=True)['Summarized_Offense_Description'].count()
                                                                                                                                  
                                                                                                                                  In [45]:
                                                                                                                                  result['CAR PROWL']
                                                                                                                                  
                                                                                                                                  Out[45]:
                                                                                                                                  day_of_week  time_of_day
                                                                                                                                  Friday       0              115
                                                                                                                                               1              150
                                                                                                                                               2              207
                                                                                                                                               3              449
                                                                                                                                  Monday       0              142
                                                                                                                                               1              142
                                                                                                                                               2              155
                                                                                                                                               3              343
                                                                                                                                  Saturday     0              157
                                                                                                                                               1              127
                                                                                                                                               2              251
                                                                                                                                               3              410
                                                                                                                                  Sunday       0              214
                                                                                                                                               1              121
                                                                                                                                               2              284
                                                                                                                                               3              386
                                                                                                                                  Thursday     0              148
                                                                                                                                               1              109
                                                                                                                                               2              235
                                                                                                                                               3              386
                                                                                                                                  Tuesday      0              140
                                                                                                                                               1              143
                                                                                                                                               2              180
                                                                                                                                               3              361
                                                                                                                                  Wednesday    0              144
                                                                                                                                               1              146
                                                                                                                                               2              215
                                                                                                                                               3              370
                                                                                                                                  Name: Summarized_Offense_Description, dtype: int64
                                                                                                                                  In [47]:
                                                                                                                                  result['ASSAULT']
                                                                                                                                  
                                                                                                                                  Out[47]:
                                                                                                                                  day_of_week  time_of_day
                                                                                                                                  Friday       0               47
                                                                                                                                               1               48
                                                                                                                                               2               68
                                                                                                                                               3               90
                                                                                                                                  Monday       0               31
                                                                                                                                               1               40
                                                                                                                                               2               98
                                                                                                                                               3               85
                                                                                                                                  Saturday     0              119
                                                                                                                                               1               44
                                                                                                                                               2               73
                                                                                                                                               3              137
                                                                                                                                  Sunday       0              102
                                                                                                                                               1               35
                                                                                                                                               2               93
                                                                                                                                               3              103
                                                                                                                                  Thursday     0               42
                                                                                                                                               1               46
                                                                                                                                               2               88
                                                                                                                                               3               97
                                                                                                                                  Tuesday      0               35
                                                                                                                                               1               44
                                                                                                                                               2               54
                                                                                                                                               3               92
                                                                                                                                  Wednesday    0               44
                                                                                                                                               1               56
                                                                                                                                               2              105
                                                                                                                                               3              102
                                                                                                                                  Name: Summarized_Offense_Description, dtype: int64
                                                                                                                                  In [ ]: